library("psych")
## Warning: package 'psych' was built under R version 3.5.3
library("skimr")
## Warning: package 'skimr' was built under R version 3.5.3
##
## Attaching package: 'skimr'
## The following object is masked from 'package:stats':
##
## filter
library("plotly")
## Warning: package 'plotly' was built under R version 3.5.3
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 3.5.3
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
bank_marketing_train <- read.csv("../bank_marketing_train.csv")
# y=yes/noのデータを抽出
bank_marketing_train_y <- bank_marketing_train[bank_marketing_train$y=="yes",]
bank_marketing_train_n <- bank_marketing_train[bank_marketing_train$y=="no",]
# データ数
num_yes = dim(bank_marketing_train_y)[1]
num_no = dim(bank_marketing_train_n)[1]
# 年齢(age)
pl_yes <- plot_ly(x = bank_marketing_train_y$age, type="histogram", name = "yes")
pl_no <- plot_ly(x = bank_marketing_train_n$age, type="histogram", name = "no")
subplot(pl_yes, pl_no)
# => yesの方が、60以上が多い
# 職業(job)
pl_yes <- plot_ly(x = bank_marketing_train_y$job, type="histogram", name = "yes")
pl_no <- plot_ly(x = bank_marketing_train_n$job, type="histogram", name = "no")
subplot(pl_yes, pl_no)
# 割合をみてみる
summary(bank_marketing_train_y$job)/num_yes
## admin. blue-collar entrepreneur housemaid management
## 0.290832455 0.138040042 0.027924131 0.023182297 0.069810327
## retired self-employed services student technician
## 0.094309800 0.029504742 0.069810327 0.060590095 0.158324552
## unemployed unknown
## 0.030031612 0.007639621
summary(bank_marketing_train_n$job)/num_no
## admin. blue-collar entrepreneur housemaid management
## 0.248163483 0.235975691 0.036630159 0.026512622 0.069921197
## retired self-employed services student technician
## 0.035862161 0.033992253 0.099839722 0.016862562 0.164551890
## unemployed unknown
## 0.023640978 0.008047282
# => yesの方が、retired/studentが多く、blue-colorが少ない。特にstudentは約4倍、retiredは約3倍違いがでている
# 最終学歴(education)
pl_yes <- plot_ly(x = bank_marketing_train_y$education, type="histogram", name = "yes")
pl_no <- plot_ly(x = bank_marketing_train_n$education, type="histogram", name = "no")
subplot(pl_yes, pl_no)
# 割合をみてみる
summary(bank_marketing_train_y$education)/num_yes
## basic.4y basic.6y basic.9y
## 0.091938883 0.038461538 0.104320337
## high.school illiterate professional.course
## 0.223129610 0.001053741 0.127239199
## university.degree unknown
## 0.356691254 0.057165437
summary(bank_marketing_train_n$education)/num_no
## basic.4y basic.6y basic.9y
## 0.1028115400 0.0558634967 0.1534660077
## high.school illiterate professional.course
## 0.2331708294 0.0003673033 0.1262855616
## university.degree unknown
## 0.2877320689 0.0403031922
# => yesはilliterateが多い
# 連絡デバイス(contact)
pl_yes <- plot_ly(x = bank_marketing_train_y$contact, type="histogram", name = "yes")
pl_no <- plot_ly(x = bank_marketing_train_n$contact, type="histogram", name = "no")
subplot(pl_yes, pl_no)
# => yesはcellularが多い
# 以前のキャンペーン結果(campaign)
pl_yes <- plot_ly(x = bank_marketing_train_y$poutcome, type="histogram", name = "yes")
pl_no <- plot_ly(x = bank_marketing_train_n$poutcome, type="histogram", name = "no")
subplot(pl_yes, pl_no)
# 割合をみてみる
summary(bank_marketing_train_y$poutcome)/num_yes
## failure nonexistent success
## 0.1290832 0.6817703 0.1891465
summary(bank_marketing_train_n$poutcome)/num_no
## failure nonexistent success
## 0.09867103 0.88793909 0.01338988
# => yesはsuccessが多い
# 以前のキャンペーンの接触回数(previous)
pl_yes <- plot_ly(x = bank_marketing_train_y$previous, type="histogram", name = "yes")
pl_no <- plot_ly(x = bank_marketing_train_n$previous, type="histogram", name = "no")
subplot(pl_yes, pl_no)
# 割合をみてみる
summary(bank_marketing_train_y$previous)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.0000 0.0000 0.4871 1.0000 6.0000
summary(bank_marketing_train_n$previous)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.0000 0.0000 0.1315 0.0000 7.0000
# => yesは平均値が大きい(yes:0.48, no:0.13)
## ロジスティック回帰
## 個人に紐づく、架電前に得られる説明変数のみ利用
lr<-glm(y~age+job+marital+default+education+housing+
loan+contact+day_of_week+pdays+poutcome+previous,
data=bank_marketing_train, family="binomial")
## step関数でAICを減らす
lr2 <- step(lr)
## Start: AIC=20763.61
## y ~ age + job + marital + default + education + housing + loan +
## contact + day_of_week + pdays + poutcome + previous
##
## Df Deviance AIC
## - housing 1 20690 20762
## <none> 20690 20764
## - loan 1 20693 20765
## - education 7 20712 20772
## - day_of_week 4 20707 20773
## - age 1 20702 20774
## - poutcome 2 20704 20774
## - previous 1 20706 20778
## - marital 3 20712 20780
## - pdays 1 20732 20804
## - default 2 20841 20911
## - job 11 20871 20923
## - contact 1 21013 21085
##
## Step: AIC=20762.21
## y ~ age + job + marital + default + education + loan + contact +
## day_of_week + pdays + poutcome + previous
##
## Df Deviance AIC
## <none> 20690 20762
## - loan 2 20694 20762
## - education 7 20713 20771
## - day_of_week 4 20708 20772
## - age 1 20703 20773
## - poutcome 2 20705 20773
## - previous 1 20706 20776
## - marital 3 20712 20778
## - pdays 1 20733 20803
## - default 2 20841 20909
## - job 11 20872 20922
## - contact 1 21013 21083
AIC(lr2)
## [1] 20762.21
summary(lr2)
##
## Call:
## glm(formula = y ~ age + job + marital + default + education +
## loan + contact + day_of_week + pdays + poutcome + previous,
## family = "binomial", data = bank_marketing_train)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.0639 -0.5021 -0.3837 -0.3010 2.8803
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.063603 0.292051 -3.642 0.000271 ***
## age 0.007940 0.002243 3.541 0.000399 ***
## jobblue-collar -0.274764 0.072316 -3.800 0.000145 ***
## jobentrepreneur -0.199841 0.112115 -1.782 0.074673 .
## jobhousemaid -0.120341 0.131644 -0.914 0.360642
## jobmanagement -0.094809 0.078278 -1.211 0.225825
## jobretired 0.612476 0.095242 6.431 1.27e-10 ***
## jobself-employed -0.156807 0.110639 -1.417 0.156401
## jobservices -0.209717 0.079214 -2.647 0.008109 **
## jobstudent 0.788676 0.104768 7.528 5.16e-14 ***
## jobtechnician -0.143732 0.064162 -2.240 0.025082 *
## jobunemployed 0.148129 0.117128 1.265 0.205986
## jobunknown -0.178919 0.227348 -0.787 0.431292
## maritalmarried 0.113629 0.062912 1.806 0.070893 .
## maritalsingle 0.298159 0.071041 4.197 2.70e-05 ***
## maritalunknown 0.126908 0.392171 0.324 0.746239
## defaultunknown -0.701074 0.060878 -11.516 < 2e-16 ***
## defaultyes -8.610256 84.476695 -0.102 0.918817
## educationbasic.6y -0.018758 0.111917 -0.168 0.866895
## educationbasic.9y -0.133104 0.086594 -1.537 0.124265
## educationhigh.school -0.062314 0.084078 -0.741 0.458606
## educationilliterate 1.003878 0.635961 1.579 0.114446
## educationprofessional.course -0.003529 0.092894 -0.038 0.969695
## educationuniversity.degree 0.067691 0.083845 0.807 0.419475
## educationunknown 0.268525 0.108378 2.478 0.013224 *
## loanunknown 0.041182 0.121540 0.339 0.734733
## loanyes -0.101550 0.052615 -1.930 0.053602 .
## contacttelephone -0.805788 0.047171 -17.082 < 2e-16 ***
## day_of_weekmon -0.141551 0.060938 -2.323 0.020186 *
## day_of_weekthu 0.059907 0.058731 1.020 0.307711
## day_of_weektue 0.054050 0.059707 0.905 0.365336
## day_of_weekwed 0.066579 0.059795 1.113 0.265510
## pdays -0.001463 0.000220 -6.649 2.95e-11 ***
## poutcomenonexistent 0.167511 0.092493 1.811 0.070130 .
## poutcomesuccess 0.800045 0.214754 3.725 0.000195 ***
## previous 0.245300 0.062066 3.952 7.74e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 23735 on 33743 degrees of freedom
## Residual deviance: 20690 on 33708 degrees of freedom
## AIC: 20762
##
## Number of Fisher Scoring iterations: 9
# => 1回目の分析で特徴が出ていたもののうち、age, job, contact, previousは影響ありそう
# => Job:retiredの条件で、データを絞ってみてみる
bank_marketing_train_job_retired <- bank_marketing_train[bank_marketing_train$job == "retired",]
# y=yes/noのデータを抽出
bank_marketing_train_job_retired_y <- bank_marketing_train_job_retired[bank_marketing_train_job_retired$y=="yes",]
bank_marketing_train_job_retired_n <- bank_marketing_train_job_retired[bank_marketing_train_job_retired$y=="no",]
summary(bank_marketing_train_job_retired_y)
## age job marital
## Min. :33.00 retired :358 divorced: 78
## 1st Qu.:60.00 admin. : 0 married :270
## Median :68.00 blue-collar : 0 single : 10
## Mean :68.37 entrepreneur: 0 unknown : 0
## 3rd Qu.:76.00 housemaid : 0
## Max. :98.00 management : 0
## (Other) : 0
## education default housing loan
## basic.4y :148 no :330 no :149 no :297
## university.degree : 60 unknown: 28 unknown: 9 unknown: 9
## high.school : 54 yes : 0 yes :200 yes : 52
## professional.course: 45
## unknown : 28
## basic.9y : 14
## (Other) : 9
## contact day_of_week duration campaign
## cellular :319 fri:64 Min. : 63.0 Min. : 1.00
## telephone: 39 mon:61 1st Qu.: 188.2 1st Qu.: 1.00
## thu:62 Median : 311.0 Median : 1.00
## tue:88 Mean : 409.2 Mean : 1.95
## wed:83 3rd Qu.: 530.8 3rd Qu.: 2.00
## Max. :2093.0 Max. :17.00
##
## pdays previous poutcome emp.var.rate
## Min. : 2.00 Min. :0.0000 failure : 51 Min. :-3.400
## 1st Qu.: 9.25 1st Qu.:0.0000 nonexistent:209 1st Qu.:-2.900
## Median :999.00 Median :0.0000 success : 98 Median :-1.800
## Mean :713.19 Mean :0.6508 Mean :-1.939
## 3rd Qu.:999.00 3rd Qu.:1.0000 3rd Qu.:-1.700
## Max. :999.00 Max. :4.0000 Max. : 1.400
##
## cons.price.idx cons.conf.idx euribor3m nr.employed
## Min. :92.20 Min. :-50.80 Min. :0.6340 Min. :4964
## 1st Qu.:92.65 1st Qu.:-42.70 1st Qu.:0.7205 1st Qu.:5009
## Median :93.08 Median :-37.50 Median :0.8760 Median :5018
## Mean :93.24 Mean :-37.58 Mean :1.3329 Mean :5052
## 3rd Qu.:93.99 3rd Qu.:-31.40 3rd Qu.:1.3650 3rd Qu.:5099
## Max. :94.77 Max. :-26.90 Max. :4.9680 Max. :5228
##
## y
## no : 0
## yes:358
##
##
##
##
##
summary(bank_marketing_train_job_retired_n)
## age job marital
## Min. :23.00 retired :1074 divorced:211
## 1st Qu.:55.00 admin. : 0 married :791
## Median :58.00 blue-collar : 0 single : 68
## Mean :60.08 entrepreneur: 0 unknown : 4
## 3rd Qu.:63.75 housemaid : 0
## Max. :95.00 management : 0
## (Other) : 0
## education default housing loan
## basic.4y :339 no :785 no :496 no :888
## university.degree :182 unknown:289 unknown: 29 unknown: 29
## high.school :181 yes : 0 yes :549 yes :157
## professional.course:155
## basic.9y :107
## basic.6y : 58
## (Other) : 52
## contact day_of_week duration campaign
## cellular :724 fri:212 Min. : 1.0 Min. : 1.000
## telephone:350 mon:238 1st Qu.: 99.0 1st Qu.: 1.000
## thu:192 Median : 160.0 Median : 2.000
## tue:226 Mean : 228.6 Mean : 2.607
## wed:206 3rd Qu.: 282.8 3rd Qu.: 3.000
## Max. :2055.0 Max. :42.000
##
## pdays previous poutcome emp.var.rate
## Min. : 1.0 Min. :0.0000 failure :134 Min. :-3.4000
## 1st Qu.:999.0 1st Qu.:0.0000 nonexistent:902 1st Qu.:-1.8000
## Median :999.0 Median :0.0000 success : 38 Median : 1.1000
## Mean :957.4 Mean :0.2086 Mean :-0.2944
## 3rd Qu.:999.0 3rd Qu.:0.0000 3rd Qu.: 1.4000
## Max. :999.0 Max. :4.0000 Max. : 1.4000
##
## cons.price.idx cons.conf.idx euribor3m nr.employed y
## Min. :92.20 Min. :-50.8 Min. :0.635 Min. :4964 no :1074
## 1st Qu.:92.96 1st Qu.:-42.7 1st Qu.:0.993 1st Qu.:5076 yes: 0
## Median :93.44 Median :-38.3 Median :4.856 Median :5191
## Mean :93.48 Mean :-38.9 Mean :3.237 Mean :5146
## 3rd Qu.:93.99 3rd Qu.:-36.1 3rd Qu.:4.961 3rd Qu.:5228
## Max. :94.77 Max. :-26.9 Max. :4.970 Max. :5228
##
# データ数
num_retired_yes = dim(bank_marketing_train_job_retired_y)[1]
num_retired_no = dim(bank_marketing_train_job_retired_n)[1]
# 年齢
pl_yes <- plot_ly(x = bank_marketing_train_job_retired_y$age, type="histogram", name = "yes")
pl_no <- plot_ly(x = bank_marketing_train_job_retired_n$age, type="histogram", name = "no")
subplot(pl_yes, pl_no)
# => yesの方が、60以上が多い
# 婚姻状況
pl_yes <- plot_ly(x = bank_marketing_train_job_retired_y$marital, type="histogram", name = "yes")
pl_no <- plot_ly(x = bank_marketing_train_job_retired_n$marital, type="histogram", name = "no")
subplot(pl_yes, pl_no)
# 割合をみてみる
summary(bank_marketing_train_job_retired_y$marital)/num_retired_yes
## divorced married single unknown
## 0.21787709 0.75418994 0.02793296 0.00000000
summary(bank_marketing_train_job_retired_n$marital)/num_retired_no
## divorced married single unknown
## 0.196461825 0.736499069 0.063314711 0.003724395
# => yesはsingleが少ない
# クレジットの支払遅延
pl_yes <- plot_ly(x = bank_marketing_train_job_retired_y$default, type="histogram", name = "yes")
pl_no <- plot_ly(x = bank_marketing_train_job_retired_n$default, type="histogram", name = "no")
subplot(pl_yes, pl_no)
# 割合をみてみる
summary(bank_marketing_train_job_retired_y$default)/num_retired_yes
## no unknown yes
## 0.92178771 0.07821229 0.00000000
summary(bank_marketing_train_job_retired_n$default)/num_retired_no
## no unknown yes
## 0.7309125 0.2690875 0.0000000
# => yesはunknownが少なく、9割が"no"
# 最終学歴
#plot_ly(x = bank_marketing_train_job_retired$education, type="histogram", color = bank_marketing_train_job_retired$y)
#plot_ly(x = bank_marketing_train_job_retired$education, type="box", color = bank_marketing_train_job_retired$y)
pl_yes <- plot_ly(x = bank_marketing_train_job_retired_y$education, type="histogram", name = "yes")
pl_no <- plot_ly(x = bank_marketing_train_job_retired_n$education, type="histogram", name = "no")
subplot(pl_yes, pl_no)
# 割合をみてみる
summary(bank_marketing_train_job_retired_y$education)/num_retired_yes
## basic.4y basic.6y basic.9y
## 0.413407821 0.019553073 0.039106145
## high.school illiterate professional.course
## 0.150837989 0.005586592 0.125698324
## university.degree unknown
## 0.167597765 0.078212291
summary(bank_marketing_train_job_retired_n$education)/num_retired_no
## basic.4y basic.6y basic.9y
## 0.3156424581 0.0540037244 0.0996275605
## high.school illiterate professional.course
## 0.1685288641 0.0009310987 0.1443202980
## university.degree unknown
## 0.1694599628 0.0474860335
# => yesはbasic.4y, illiterate(学歴が高くない)が多い
# 不動産ローンの有無
pl_yes <- plot_ly(x = bank_marketing_train_job_retired_y$housing, type="histogram", name = "yes")
pl_no <- plot_ly(x = bank_marketing_train_job_retired_n$housing, type="histogram", name = "no")
subplot(pl_yes, pl_no)
# 割合をみてみる
summary(bank_marketing_train_job_retired_y$housing)/num_retired_yes
## no unknown yes
## 0.41620112 0.02513966 0.55865922
summary(bank_marketing_train_job_retired_n$housing)/num_retired_no
## no unknown yes
## 0.46182495 0.02700186 0.51117318
# => 大きな差はない(yesは少しローン有が多い)
# 個人ローンの有無
pl_yes <- plot_ly(x = bank_marketing_train_job_retired_y$loan, type="histogram", name = "yes")
pl_no <- plot_ly(x = bank_marketing_train_job_retired_n$loan, type="histogram", name = "no")
subplot(pl_yes, pl_no)
# 割合をみてみる
summary(bank_marketing_train_job_retired_y$loan)/num_retired_yes
## no unknown yes
## 0.82960894 0.02513966 0.14525140
summary(bank_marketing_train_job_retired_n$loan)/num_retired_no
## no unknown yes
## 0.82681564 0.02700186 0.14618250
# => 差はなさそう
# 連絡デバイス
pl_yes <- plot_ly(x = bank_marketing_train_job_retired_y$contact, type="histogram", name = "yes")
pl_no <- plot_ly(x = bank_marketing_train_job_retired_n$contact, type="histogram", name = "no")
subplot(pl_yes, pl_no)
# 割合をみてみる
summary(bank_marketing_train_job_retired_y$contact)/num_retired_yes
## cellular telephone
## 0.8910615 0.1089385
summary(bank_marketing_train_job_retired_n$contact)/num_retired_no
## cellular telephone
## 0.6741155 0.3258845
# => yesはcellularが多い
# 前回の接触からの経過日数
#plot_ly(x = bank_marketing_train_job_retired$pdays, type="histogram", color = bank_marketing_train_job_retired$y)
pl_yes <- plot_ly(x = bank_marketing_train_job_retired_y$pdays, type="histogram", name = "yes")
pl_no <- plot_ly(x = bank_marketing_train_job_retired_n$pdays, type="histogram", name = "no")
subplot(pl_yes, pl_no)
# 割合をみてみる
summary(bank_marketing_train_job_retired_y$pdays)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2.00 9.25 999.00 713.19 999.00 999.00
summary(bank_marketing_train_job_retired_n$pdays)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.0 999.0 999.0 957.4 999.0 999.0
# => yesは日数が短い人の割合が大きい
# 以前のキャンペーン結果
#plot_ly(x = bank_marketing_train_job_retired$poutcome, type="histogram", color = bank_marketing_train_job_retired$y)
pl_yes <- plot_ly(x = bank_marketing_train_job_retired_y$poutcome, type="histogram", name = "yes")
pl_no <- plot_ly(x = bank_marketing_train_job_retired_n$poutcome, type="histogram", name = "no")
subplot(pl_yes, pl_no)
# 割合をみてみる
summary(bank_marketing_train_job_retired_y$poutcome)/num_retired_yes
## failure nonexistent success
## 0.1424581 0.5837989 0.2737430
summary(bank_marketing_train_job_retired_n$poutcome)/num_retired_no
## failure nonexistent success
## 0.12476723 0.83985102 0.03538175
# => yesはfailure, successが多い
# 以前のキャンペーンの接触回数
pl_yes <- plot_ly(x = bank_marketing_train_job_retired_y$previous, type="histogram", name = "yes")
pl_no <- plot_ly(x = bank_marketing_train_job_retired_n$previous, type="histogram", name = "no")
subplot(pl_yes, pl_no)
# 割合をみてみる
summary(bank_marketing_train_job_retired_y$previous)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.0000 0.0000 0.6508 1.0000 4.0000
summary(bank_marketing_train_job_retired_n$previous)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.0000 0.0000 0.2086 0.0000 4.0000
# => yesは平均値が大きい
lr3<-glm(y~.-day_of_week-duration-campaign,
data=bank_marketing_train, family="binomial")
summary(lr3)
##
## Call:
## glm(formula = y ~ . - day_of_week - duration - campaign, family = "binomial",
## data = bank_marketing_train)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.9013 -0.4129 -0.3240 -0.2778 2.9504
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -9.508e+01 1.600e+01 -5.944 2.79e-09 ***
## age 1.383e-03 2.318e-03 0.597 0.55060
## jobblue-collar -1.963e-01 7.484e-02 -2.623 0.00871 **
## jobentrepreneur -8.459e-02 1.153e-01 -0.734 0.46322
## jobhousemaid -1.070e-01 1.389e-01 -0.770 0.44109
## jobmanagement -7.113e-02 8.211e-02 -0.866 0.38631
## jobretired 2.807e-01 1.028e-01 2.731 0.00632 **
## jobself-employed -1.509e-01 1.146e-01 -1.317 0.18772
## jobservices -1.568e-01 8.150e-02 -1.925 0.05428 .
## jobstudent 2.415e-01 1.088e-01 2.219 0.02649 *
## jobtechnician -2.015e-02 6.753e-02 -0.298 0.76540
## jobunemployed -5.372e-02 1.223e-01 -0.439 0.66040
## jobunknown -2.881e-01 2.362e-01 -1.219 0.22267
## maritalmarried 5.616e-02 6.552e-02 0.857 0.39137
## maritalsingle 1.196e-01 7.447e-02 1.605 0.10840
## maritalunknown 6.134e-02 4.103e-01 0.150 0.88115
## educationbasic.6y 4.597e-02 1.151e-01 0.399 0.68959
## educationbasic.9y -5.598e-02 8.954e-02 -0.625 0.53181
## educationhigh.school -1.073e-02 8.735e-02 -0.123 0.90226
## educationilliterate 1.192e+00 6.560e-01 1.817 0.06914 .
## educationprofessional.course 4.639e-02 9.657e-02 0.480 0.63096
## educationuniversity.degree 1.052e-01 8.729e-02 1.206 0.22791
## educationunknown 2.121e-01 1.133e-01 1.872 0.06122 .
## defaultunknown -3.176e-01 6.312e-02 -5.032 4.86e-07 ***
## defaultyes -7.644e+00 8.447e+01 -0.090 0.92790
## housingunknown -3.040e-02 1.279e-01 -0.238 0.81208
## housingyes -4.811e-02 3.933e-02 -1.223 0.22125
## loanunknown NA NA NA NA
## loanyes -8.754e-02 5.471e-02 -1.600 0.10960
## contacttelephone -8.601e-01 6.075e-02 -14.156 < 2e-16 ***
## pdays -1.176e-03 2.207e-04 -5.326 1.00e-07 ***
## previous -5.077e-02 6.156e-02 -0.825 0.40954
## poutcomenonexistent 5.005e-01 9.467e-02 5.287 1.24e-07 ***
## poutcomesuccess 6.782e-01 2.158e-01 3.143 0.00167 **
## emp.var.rate -7.105e-01 6.552e-02 -10.844 < 2e-16 ***
## cons.price.idx 1.099e+00 1.043e-01 10.534 < 2e-16 ***
## cons.conf.idx 3.879e-02 6.027e-03 6.436 1.23e-10 ***
## euribor3m 6.074e-02 8.249e-02 0.736 0.46148
## nr.employed -1.506e-03 1.451e-03 -1.038 0.29949
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 23735 on 33743 degrees of freedom
## Residual deviance: 19114 on 33706 degrees of freedom
## AIC: 19190
##
## Number of Fisher Scoring iterations: 9
## step関数
lr4 <- step(lr3)
## Start: AIC=19189.68
## y ~ (age + job + marital + education + default + housing + loan +
## contact + day_of_week + duration + campaign + pdays + previous +
## poutcome + emp.var.rate + cons.price.idx + cons.conf.idx +
## euribor3m + nr.employed) - day_of_week - duration - campaign
##
## Df Deviance AIC
## - marital 3 19117 19187
## - age 1 19114 19188
## - euribor3m 1 19114 19188
## - previous 1 19114 19188
## - nr.employed 1 19115 19189
## - education 7 19127 19189
## - housing 1 19115 19189
## <none> 19114 19190
## - loan 1 19116 19190
## - job 11 19150 19204
## - default 2 19141 19213
## - pdays 1 19142 19216
## - poutcome 2 19147 19219
## - cons.conf.idx 1 19155 19229
## - cons.price.idx 1 19221 19295
## - emp.var.rate 1 19230 19304
## - contact 1 19327 19401
##
## Step: AIC=19186.58
## y ~ age + job + education + default + housing + loan + contact +
## pdays + previous + poutcome + emp.var.rate + cons.price.idx +
## cons.conf.idx + euribor3m + nr.employed
##
## Df Deviance AIC
## - age 1 19117 19185
## - euribor3m 1 19117 19185
## - previous 1 19117 19185
## - nr.employed 1 19118 19186
## - housing 1 19118 19186
## - education 7 19130 19186
## <none> 19117 19187
## - loan 1 19119 19187
## - job 11 19158 19206
## - default 2 19143 19209
## - pdays 1 19145 19213
## - poutcome 2 19150 19216
## - cons.conf.idx 1 19159 19227
## - cons.price.idx 1 19225 19293
## - emp.var.rate 1 19234 19302
## - contact 1 19331 19399
##
## Step: AIC=19184.58
## y ~ job + education + default + housing + loan + contact + pdays +
## previous + poutcome + emp.var.rate + cons.price.idx + cons.conf.idx +
## euribor3m + nr.employed
##
## Df Deviance AIC
## - euribor3m 1 19117 19183
## - previous 1 19117 19183
## - nr.employed 1 19118 19184
## - housing 1 19118 19184
## - education 7 19130 19184
## <none> 19117 19185
## - loan 1 19119 19185
## - job 11 19160 19206
## - default 2 19144 19208
## - pdays 1 19145 19211
## - poutcome 2 19150 19214
## - cons.conf.idx 1 19159 19225
## - cons.price.idx 1 19225 19291
## - emp.var.rate 1 19234 19300
## - contact 1 19331 19397
##
## Step: AIC=19183.09
## y ~ job + education + default + housing + loan + contact + pdays +
## previous + poutcome + emp.var.rate + cons.price.idx + cons.conf.idx +
## nr.employed
##
## Df Deviance AIC
## - nr.employed 1 19118 19182
## - previous 1 19118 19182
## - housing 1 19119 19183
## - education 7 19131 19183
## <none> 19117 19183
## - loan 1 19120 19184
## - job 11 19160 19204
## - default 2 19145 19207
## - pdays 1 19145 19209
## - poutcome 2 19151 19213
## - cons.conf.idx 1 19227 19291
## - emp.var.rate 1 19248 19312
## - cons.price.idx 1 19256 19320
## - contact 1 19332 19396
##
## Step: AIC=19181.66
## y ~ job + education + default + housing + loan + contact + pdays +
## previous + poutcome + emp.var.rate + cons.price.idx + cons.conf.idx
##
## Df Deviance AIC
## - previous 1 19118 19180
## - housing 1 19119 19181
## - education 7 19132 19182
## <none> 19118 19182
## - loan 1 19120 19182
## - job 11 19161 19203
## - default 2 19145 19205
## - pdays 1 19146 19208
## - poutcome 2 19151 19211
## - cons.conf.idx 1 19253 19315
## - contact 1 19362 19424
## - cons.price.idx 1 19642 19704
## - emp.var.rate 1 20622 20684
##
## Step: AIC=19180.23
## y ~ job + education + default + housing + loan + contact + pdays +
## poutcome + emp.var.rate + cons.price.idx + cons.conf.idx
##
## Df Deviance AIC
## - housing 1 19120 19180
## - education 7 19132 19180
## <none> 19118 19180
## - loan 1 19121 19181
## - job 11 19162 19202
## - default 2 19146 19204
## - pdays 1 19147 19207
## - poutcome 2 19218 19276
## - cons.conf.idx 1 19253 19313
## - contact 1 19363 19423
## - cons.price.idx 1 19667 19727
## - emp.var.rate 1 20653 20713
##
## Step: AIC=19179.66
## y ~ job + education + default + loan + contact + pdays + poutcome +
## emp.var.rate + cons.price.idx + cons.conf.idx
##
## Df Deviance AIC
## - loan 2 19123 19179
## - education 7 19133 19179
## <none> 19120 19180
## - job 11 19163 19201
## - default 2 19147 19203
## - pdays 1 19149 19207
## - poutcome 2 19219 19275
## - cons.conf.idx 1 19256 19314
## - contact 1 19363 19421
## - cons.price.idx 1 19669 19727
## - emp.var.rate 1 20654 20712
##
## Step: AIC=19178.48
## y ~ job + education + default + contact + pdays + poutcome +
## emp.var.rate + cons.price.idx + cons.conf.idx
##
## Df Deviance AIC
## - education 7 19136 19178
## <none> 19123 19179
## - job 11 19166 19200
## - default 2 19150 19202
## - pdays 1 19151 19205
## - poutcome 2 19222 19274
## - cons.conf.idx 1 19259 19313
## - contact 1 19366 19420
## - cons.price.idx 1 19672 19726
## - emp.var.rate 1 20658 20712
##
## Step: AIC=19178.22
## y ~ job + default + contact + pdays + poutcome + emp.var.rate +
## cons.price.idx + cons.conf.idx
##
## Df Deviance AIC
## <none> 19136 19178
## - default 2 19164 19202
## - pdays 1 19165 19205
## - job 11 19191 19211
## - poutcome 2 19237 19275
## - cons.conf.idx 1 19280 19320
## - contact 1 19385 19425
## - cons.price.idx 1 19693 19733
## - emp.var.rate 1 20681 20721
AIC(lr4)
## [1] 19178.22
summary(lr4)
##
## Call:
## glm(formula = y ~ job + default + contact + pdays + poutcome +
## emp.var.rate + cons.price.idx + cons.conf.idx, family = "binomial",
## data = bank_marketing_train)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.9100 -0.4162 -0.3238 -0.2797 2.9583
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.110e+02 4.650e+00 -23.863 < 2e-16 ***
## jobblue-collar -2.618e-01 6.097e-02 -4.295 1.75e-05 ***
## jobentrepreneur -9.485e-02 1.138e-01 -0.834 0.404516
## jobhousemaid -1.419e-01 1.321e-01 -1.074 0.282787
## jobmanagement -4.861e-02 8.023e-02 -0.606 0.544562
## jobretired 2.660e-01 8.171e-02 3.255 0.001133 **
## jobself-employed -1.380e-01 1.136e-01 -1.214 0.224663
## jobservices -2.120e-01 7.743e-02 -2.738 0.006180 **
## jobstudent 2.540e-01 9.971e-02 2.547 0.010855 *
## jobtechnician -2.225e-02 6.004e-02 -0.371 0.710965
## jobunemployed -9.283e-02 1.206e-01 -0.769 0.441599
## jobunknown -2.458e-01 2.320e-01 -1.060 0.289202
## defaultunknown -3.172e-01 6.211e-02 -5.106 3.28e-07 ***
## defaultyes -7.640e+00 8.448e+01 -0.090 0.927943
## contacttelephone -8.798e-01 5.733e-02 -15.347 < 2e-16 ***
## pdays -1.109e-03 2.059e-04 -5.386 7.22e-08 ***
## poutcomenonexistent 5.607e-01 6.150e-02 9.116 < 2e-16 ***
## poutcomesuccess 7.352e-01 2.071e-01 3.551 0.000384 ***
## emp.var.rate -7.352e-01 1.897e-02 -38.753 < 2e-16 ***
## cons.price.idx 1.190e+00 4.988e-02 23.869 < 2e-16 ***
## cons.conf.idx 4.441e-02 3.720e-03 11.940 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 23735 on 33743 degrees of freedom
## Residual deviance: 19136 on 33723 degrees of freedom
## AIC: 19178
##
## Number of Fisher Scoring iterations: 9
Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.